
cap gen ah_location=system1locationairhandler
cap gen ah_location_st=system1locationairhandler_st
cap gen ah_type=system1airhandlertype
cap gen ah_type_st=system1airhandlertype_st
cap gen wh_tanktype=waterheatertanktype
cap gen wh_tanktype_st=waterheatertanktype_st

cap rename ductsystem1type duct_type
cap rename ductsystem1type duct_type


sum notwinrec if !missing(pricediff)
cap gen notwinrec_st=(notwinrec-r(mean))/r(sd)
replace notwinrec_st=(notwinrec-r(mean))/r(sd)

sum twosystems  if !missing(pricediff)
cap gen twosystems_st=(twosystems-r(mean))/r(sd)
replace twosystems_st=(twosystems-r(mean))/r(sd)


cap gen notraddr=(rrec==0) if !missing(rrec) 
sum notraddr if !missing(pricediff)
cap gen notraddr_st=(notraddr-r(mean))/r(sd)


rename waterheaterfueltype whftype
rename furnacefueltype fftype
rename furnacefueltype_st fftype_st
rename waterheaterfueltype_st whftype_st



cap rename system1airhandlertype ah_type
cap rename system1locationairhandler ah_location
cap rename ductsystem1type_st duct_type_st


local less_observable_vars="atticrvalue ductravg duct_type negleakavg rsavg  eeravg"
local more_observable_vars="ah_location sizeavg progtherm fftype whftype negsysageavg"
local other_less_observable_vars="ah_type notwinrec"
local other_more_observable_vars="twosystems notraddr"



local less_observable_vars_st="atticrvalue_st ductravg_st duct_type_st negleakavg_st rsavg_st  eeravg_st"
local more_observable_vars_st="ah_location_st sizeavg_st progtherm_st fftype_st whftype_st negsysageavg_st"
local other_less_observable_vars_st="ah_type_st notwinrec_st"
local other_more_observable_vars_st="twosystems_st notraddr_st"

local extravars="wh_tanktype"





egen less_observable=rowmean(`less_observable_vars_st' `other_less_observable_vars_st')
egen more_observable=rowmean(`more_observable_vars_st' `other_more_observable_vars_st')

egen count_nm_less=rownonmiss(`less_observable_vars_st' `other_less_observable_vars_st')
egen count_nm_more=rownonmiss(`more_observable_vars_st' `other_more_observable_vars_st')

replace less_observable=. if count_nm_less!=`: word count `less_observable_vars_st' `other_less_observable_vars_st''
replace more_observable=. if count_nm_more!=`: word count `more_observable_vars_st' `other_more_observable_vars_st''



local parsimonious_less_vars "negleakavg atticrvalue ductravg     "
local parsimonious_more_vars_fuel "fftype whftype "
local parsimonious_more_vars_other "ah_location progtherm twosystems"

local allvars "`less_observable_vars' `more_observable_vars' `other_less_observable_vars' `other_more_observable_vars'"

local parsimonious_less_vars_st="ductravg_st negleakavg_st atticrvalue_st"
local parsimonious_more_vars_fuel_st="fftype_st whftype_st"
local parsimonious_more_vars_other_st="ah_location_st progtherm_st  twosystems_st"

egen parsimonious_less=rowmean(`parsimonious_less_vars_st')
egen parsimonious_more_fuel=rowmean(`parsimonious_more_vars_fuel_st')
egen parsimonious_more_other=rowmean(`parsimonious_more_vars_other_st')

egen parsimonious_more=rowmean(`parsimonious_more_vars_fuel_st' `parsimonious_more_vars_other_st')

cap drop count_nm_less
cap drop count_nm_more
egen count_nm_less=rownonmiss(`parsimonious_less_vars_st')
egen count_nm_More_Fuel=rownonmiss(`parsimonious_more_vars_fuel_st')
egen count_nm_More_Other=rownonmiss(`parsimonious_more_vars_other_st')
egen count_nm_more=rownonmiss(`parsimonious_more_vars_fuel_st' `parsimonious_more_vars_other_st')

replace parsimonious_less=. if count_nm_less!=`: word count `parsimonious_less_vars_st''
replace parsimonious_more_fuel=. if count_nm_More_Fuel!=`: word count `parsimonious_more_vars_fuel_st''
replace parsimonious_more_other=. if count_nm_More_Other!=`: word count `parsimonious_more_vars_other_st''
replace parsimonious_more=. if count_nm_more!=`: word count `parsimonious_more_vars_fuel_st''+ `: word count `parsimonious_more_vars_other_st''



local k=1

*1st factor


************************************************
local fac`k'="More_Other"
local fac`k'_anchor="ah_location "
local fac`k'_other= "progtherm twosystems"
************************************************

local k=`k'+1

***********************************************
local fac`k'="Less"
local fac`k'_anchor="negleakavg "
local fac`k'_other= " atticrvalue ductravg "
************************************************

local k=`k'+1


********************************************************************************
local fac`k'="More_Fuel"
local fac`k'_anchor="fftype"
local fac`k'_other= "whftype"
********************************************************************************



cap drop _merge
	cap drop dum
	cap gen dum=1
	
	merge m:1 dum using "C:/Users/awcassidy1\Dropbox\jmp_new\cleaned_data/load_tt_from_test.dta"
	
	gen Less=0
	gen More_Fuel=0
	gen More_Other=0
	foreach v in `parsimonious_less_vars' {
		replace Less = Less+ `v'_st*`v'_Less
	}
	foreach v in `parsimonious_more_vars_fuel' {
		replace More_Fuel = More_Fuel+ `v'_st*`v'_More_Fuel
	}
	foreach v in `parsimonious_more_vars_other' {
		replace More_Other = More_Other+ `v'_st*`v'_More_Other
	}

sum Less, detail	
gen above_median_less=(Less>r(p50)) if !missing(Less)
tab above_median_less

la var Less "Less"
la var More_Other "More-Other"
la var More_Fuel "More-Fuel"



local list_to_standardize parsimonious_less parsimonious_more_fuel ///
	parsimonious_more_other Less More_Fuel More_Other conditionedsqft less_observable more_observable above_median_less

foreach w in `list_to_standardize'  {
	sum `w' 
	gen `w'_st=(`w'-r(mean))/(r(sd))
	la var `w'_st "`: var label `w''"
	}
	
gen sum=less_observable+more_observable
gen sum_st=less_observable_st+more_observable_st
	
la var parsimonious_less "Less"
la var parsimonious_more_fuel "More-Fuel"
la var parsimonious_more_other "More-Other"


local extracted="`parsimonious_less_vars' `parsimonious_more_vars_fuel' `parsimonious_more_vars_other'"
local not_extracted="`allvars'"
foreach v of varlist `extracted' {
		local not_extracted=subinstr("`not_extracted'","`v'","",.)
		
}

*Make a measure of overall EE:
gen EE=Less+More_Fuel+More_Other
sum EE, detail

gen above_median_EE=(EE>r(p50)) if !missing(EE)

gen EE_all_mean=less_observable+more_observable


xtset mlsid relsalenum


cap drop dum
gen dum=0
replace dum=1 if !missing(pricediff)

egen has_main=max(dum), by(mlsid)

cap drop dum
gen dum=0
replace dum=1 if !missing(pricediff_baseline)
egen has_pre=max(dum), by(mlsid)


gen pre=0
replace pre=1 if !missing(f.pricediff_baseline) & !missing(f.f.pricediff)

gen saleneg1=0
replace saleneg1 = 1 if !missing(pricediff_baseline) & !missing(f.pricediff)

gen main=0
replace main=1 if !missing(pricediff) & !missing(l.pricediff_baseline)



gen bal_sample=0
replace bal_sample=1 if  has_main==1 & has_pre==1 & (pre==1 | main==1 | saleneg1==1)



sum EE_all_mean, detail
gen above_median_ee_all_mean=(EE_all_mean>r(p50)) if !missing(EE_all_mean)
sum above_median_ee_all_mean




la var above_median_ee_all_mean "Above med EE (averaged z-score over 16 measures)"

pca `parsimonious_less_vars' `parsimonious_more_vars_fuel' `parsimonious_more_vars_other' `not_extracted'


gen new_pricediff_baseline=price-l.price if bal_sample==1 & f.post==1


gen new_lsaleyear_baseline=l.saleyear if bal_sample==1 & f.post==1 & !missing(f.pricediff)
la var new_pricediff_baseline "Easier to work with pricediff_baseline"

sum less_observable, detail
gen above_median_less_all = (less_observable>r(p50)) if !missing(less_observable)

sum more_observable, detail
gen above_median_more_all = (more_observable>r(p50)) if !missing(more_observable)

*standardize more stuff
local list_to_standardize   EE_all_mean ///
	above_median_EE above_median_ee_all_mean  ///
	above_median_less_all  ///
	above_median_more_all ///
	parsimonious_more
	
foreach w in `list_to_standardize'  {
	sum `w' 
	gen `w'_st=(`w'-r(mean))/(r(sd))
	}

di "`not_extracted'"

egen the_rest_total = rowtotal(`parsimonious_more_vars_fuel_st' `parsimonious_more_vars_other_st' `not_extracted_st')
egen the_rest_wc=rownonmiss(`parsimonious_more_vars_fuel_st' `parsimonious_more_vars_other_st' `not_extracted_st')
replace the_rest_total = . if the_rest_wc!=`: word count `parsimonious_more_vars_fuel_st' `parsimonious_more_vars_other_st' `not_extracted_st''




la var More_Fuel "More-Fuel"
la var closeprice1 "Price at Sale 1"
la var EE_all_mean "Overall EE"
la var EE_all_mean_st "Overall EE"

la var More_Other "More-Other"
la var wh_tanktype "WH Tank Type"

la var notraddr "Did not Rec Add Attic R"
la var the_rest_total "Overall EE except Less" 

la var less_observable "Less (8-var)"
la var more_observable "More (8-var)"

la var less_observable_st "Less (8-var)"
la var more_observable_st "More (8-var)"

save "../cleaned_data/data_with_observability_indices.dta", replace

